///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2003-5 University of Edinburgh (Michael White) and Gunes Erkan
// 
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// 
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
// 
// You should have received a copy of the GNU Lesser General Public
// License along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//////////////////////////////////////////////////////////////////////////////

package opennlp.ccg.grammar;

import opennlp.ccg.lexicon.*;
import opennlp.ccg.util.*;
import opennlp.ccg.synsem.*;
import opennlp.ccg.hylo.*;
import opennlp.ccg.parse.Parser;
import opennlp.ccg.parse.ParseException;
import opennlp.ccg.realize.Realizer;

import org.jdom.*;
import org.jdom.input.*;
import org.jdom.output.*;
import org.jdom.transform.*;
import org.xml.sax.*;
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.stream.*;
import javax.xml.transform.sax.*;
import java.io.*;
import java.net.URL;
import java.util.*;

/**
 * A CCG grammar is essentially a lexicon plus a rule group.
 * A grammar may also have sequences of transformations to use in 
 * loading/saving LFs from/to XML.
 *
 * @author  Michael White
 * @author  Gunes Erkan
 * @version $Revision: 1.40 $, $Date: 2007/09/27 17:57:18 $ 
 */
public class Grammar {

    /** The lexicon. */
    public final Lexicon lexicon;
    
    /** The rule group. */
    public final RuleGroup rules;

    /** The type hierarchy. */
    public final Types types;
    
    /** The features to include in supertags. */
    public final Set<String> supertagFeatures = new HashSet<String>();
    
    /** The sequence of transformations to use when loading LFs from XML. */
    public final URL[] fromXmlTransforms;
    
    /** The sequence of transformations to use when saving LFs to XML. */
    public final URL[] toXmlTransforms;

    /** Preferences for displaying elements in this grammar. */
    public DisplayPrefs prefs = new DisplayPrefs();
   
    /** For access to the current grammar; should be generalized eventually. */
    public static Grammar theGrammar;
	
    // name of the grammar
    private String grammarName = null;
	
    // parser, for getting parsed words
    private Parser parser = null; 

    // XML factories
    private SAXParserFactory spf = null; 
    private static SAXTransformerFactory stf = null; 
    
    // transformer for loading/saving LFs from/to XML
    private Transformer transformer = null;
    
    // transformations for loading/saving LFs from/to XML
    private Templates[] fromXmlTemplates = null;
    private Templates[] toXmlTemplates = null;
    
    // transformer for saving strings to APML
    private Transformer apmlTransformer = null;
    
    /** The pitch accents recognized as underscored suffixes for translation to APML. */
    public static final String[] pitchAccents = { 
        "H*", "L*", "L+H*", "L*+H", "H*+L", "H+L*"
    };

    // set of pitch accents
    private static Set<String> pitchAccentsSet = null;    
    
    /** The boundary tones recognized as separate tokens for translation to APML. */
    public static final String[] boundaryTones = { 
        "L", "H", "LL%", "HH%", "LH%", "HL%"
    };
    
    // set of boundary tones
    private static Set<String> boundaryTonesSet = null;    

    
    /** Loads a grammar from the given filename. */
    public Grammar(String filename) throws IOException {
        this(new File(filename).toURL());
    }
    
    /** Loads a grammar from the given URL. */
    public Grammar(URL url) throws IOException {
        theGrammar = this;
        // read XML
        SAXBuilder builder = new SAXBuilder();
        Document doc;
        try {
            doc = builder.build(url);
        } catch (JDOMException jde) {
            throw (IOException) new IOException().initCause(jde);
        }
        Element root = doc.getRootElement();	// root corresponds to <grammar>
		    grammarName = root.getAttributeValue("name");
		
        Element supertagsElt = root.getChild("supertags");
        if (supertagsElt != null) {
            String feats = supertagsElt.getAttributeValue("feats");
            if (feats != null) {
                String[] names = feats.split("\\s+");
                for (int i = 0; i < names.length; i++) {
                    supertagFeatures.add(names[i]);
                }
            }
        }
        if (supertagFeatures.isEmpty()) {
            // default is "form" and "lex"
            supertagFeatures.add("form"); supertagFeatures.add("lex"); 
        }
        
        Tokenizer tokenizer = null;
        Element tokenizerElt = root.getChild("tokenizer");
        if (tokenizerElt != null) {
            String tokenizerClass = tokenizerElt.getAttributeValue("classname");
            if (tokenizerClass != null) {
                try {
                    tokenizer = (Tokenizer) Class.forName(tokenizerClass).newInstance();
                } catch (Exception exc) {
                    throw (IOException) new IOException().initCause(exc);
                }
            }
            else tokenizer = new DefaultTokenizer();
            String replacementSemClasses = tokenizerElt.getAttributeValue("replacement-sem-classes");
            if (replacementSemClasses != null) {
                String[] semClasses = replacementSemClasses.split("\\s+");
                for (int i = 0; i < semClasses.length; i++) {
                    tokenizer.addReplacementSemClass(semClasses[i]);
                }
            }
        }
        Element typesElt = root.getChild("types");
        URL typesUrl;
        if (typesElt != null) {
            typesUrl = new URL(url, typesElt.getAttributeValue("file"));
        }
        else typesUrl = null;
        Element lexiconElt = root.getChild("lexicon");
        URL lexiconUrl = new URL(url, lexiconElt.getAttributeValue("file")); 
        Element morphElt = root.getChild("morphology");
        URL morphUrl = new URL(url, morphElt.getAttributeValue("file"));
        Element rulesElt = root.getChild("rules");
        URL rulesUrl = new URL(url, rulesElt.getAttributeValue("file"));
        Element fromXmlElt = root.getChild("LF-from-XML");
        if (fromXmlElt != null) {
            List children = fromXmlElt.getChildren();
            fromXmlTransforms = new URL[children.size()];
            for (int i = 0; i < children.size(); i++) {
                Element transformElt = (Element) children.get(i);
                fromXmlTransforms[i] = new URL(url, transformElt.getAttributeValue("file"));
            }
        } else {
            fromXmlTransforms = new URL[0];
        }
        Element toXmlElt = root.getChild("LF-to-XML");
        if (toXmlElt != null) {
            List children = toXmlElt.getChildren();
            toXmlTransforms = new URL[children.size()];
            for (int i = 0; i < children.size(); i++) {
                Element transformElt = (Element) children.get(i);
                toXmlTransforms[i] = new URL(url, transformElt.getAttributeValue("file"));
            }
        } else {
            toXmlTransforms = new URL[0];
        }
        // load type hierarchy, lexicon and rules
        if (typesUrl != null) types = new Types(typesUrl, this);
        else types = new Types(this);
        if (tokenizer != null) lexicon = new Lexicon(this, tokenizer);
        else lexicon = new Lexicon(this);
        lexicon.init(lexiconUrl, morphUrl); 
        rules = new RuleGroup(rulesUrl, this);
        // add observed supertag-rule combos for filtering, if any
        String combosfile = rulesElt.getAttributeValue("combosfile");
        if (combosfile != null) {
        	URL combosUrl = new URL(url, combosfile);
        	rules.loadSupercatRuleCombos(combosUrl);
        }
    }

    
    /**
     * Returns a file url string relative to the user's current directory 
     * for the given filename.
     */
    public static String convertToFileUrl(String filename) {
        try {
            return new File(filename).toURL().toString();
        }
        catch (java.net.MalformedURLException exc) {
            throw (RuntimeException) new RuntimeException().initCause(exc);
        }
        // return "file:"+System.getProperty("user.dir")+"/"+filename;
    }
    
    
    // initializes factories and transformers
    private void initializeTransformers() throws TransformerConfigurationException {
        // init factories
        if (spf == null) {
            spf = SAXParserFactory.newInstance(); 
            spf.setNamespaceAware(true);
        }
        if (stf == null) {
            stf = (SAXTransformerFactory) TransformerFactory.newInstance();
            try { // try setting indent at factory level
                stf.setAttribute("indent-number", new Integer(2));
            } catch (IllegalArgumentException exc) {} // ignore
        }
        // set up transformer with indenting
        // nb: with some JVMs (eg JDK 1.4.1 on Windows), 
        //     the transformer needs to be reinitialized each time, in order to 
        //     run multiple :r FN commands in tccg 
        if (transformer == null) {
            transformer = stf.newTransformer();
            transformer.setOutputProperty(OutputKeys.INDENT, "yes");
            try { // also try setting indent as a xalan property 
                transformer.setOutputProperty("{http://xml.apache.org/xalan}indent-amount", "2");
            } catch (IllegalArgumentException exc) {} // ignore
        }
        // set up apml transformer 
        if (apmlTransformer == null) {
            InputStream toApmlStr = ClassLoader.getSystemResourceAsStream("opennlp/ccg/grammar/to-apml.xsl");
            apmlTransformer = stf.newTransformer(new StreamSource(toApmlStr));
            // nb: DOCTYPE SYSTEM also specified in to-apml.xsl; including  
            //     redundant specification here to workaround omission of DOCTYPE with Linux 1.5 JVM
            apmlTransformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, "apml.dtd");
        }
    }
    
    
    // does setup for LF from XML transformation, and returns a SAXSource for the given input stream
    // nb: need a new filter chain one for each use (perhaps due to an underyling bug)
    private SAXSource fromXmlSetup(InputStream istream) throws IOException {
        try {
            // initialize transformer
            initializeTransformers();
            // load transformations
            if (fromXmlTemplates == null) {
                fromXmlTemplates = new Templates[fromXmlTransforms.length];
                for (int i = 0; i < fromXmlTemplates.length; i++) {
                    String url = fromXmlTransforms[i].toString();
                    fromXmlTemplates[i] = stf.newTemplates(new StreamSource(url));
                }
            }
            // set up initial reader
            SAXParser parser = spf.newSAXParser();
            XMLReader reader = parser.getXMLReader();
            // set up chain of filters
            XMLFilter[] filters = new XMLFilter[fromXmlTransforms.length];
            for (int i = 0; i < filters.length; i++) {
                // create filter
                filters[i] = stf.newXMLFilter(fromXmlTemplates[i]);
                // set parent
                if (i == 0) { filters[0].setParent(reader); }
                else { filters[i].setParent(filters[i-1]); }
            }
            // set final reader/filter
            XMLReader finalReader = (filters.length == 0) ? reader : filters[filters.length-1];
            // set up and return LF from XML SAX source with final reader/filter
            return new SAXSource(finalReader, new InputSource(istream));
        } catch (ParserConfigurationException pce) {
            throw (IOException) new IOException().initCause(pce);
        } catch (SAXException se) {
            throw (IOException) new IOException().initCause(se);
        } catch (TransformerConfigurationException tce) {
            throw (IOException) new IOException().initCause(tce);
        }
    }
    
    /**
     * Loads a document from the XML in the given input stream, 
     * applying the configured from-XML transformations.
     */
    public synchronized Document loadFromXml(InputStream istream) throws IOException {
        try {
            // do setup and get source
            Source source = fromXmlSetup(istream);
            // do transformation
            JDOMResult result = new JDOMResult();
            transformer.transform(source, result);
            // return result doc
            return result.getDocument();
        } catch (TransformerException exc) { 
            throw (IOException) new IOException().initCause(exc);
        }
    }
    
    /**
     * Loads a document from the XML file with the given filename, 
     * applying the configured from-XML transformations.
     */
    public synchronized Document loadFromXml(String filename) throws IOException {
        BufferedInputStream bis = new BufferedInputStream(new FileInputStream(filename));
        Document retval = loadFromXml(bis);
        bis.close();
        return retval;
    }
    

    // does setup for LF to XML transformation, and returns a SAXSource for the given source
    // nb: need a new filter chain one for each use (perhaps due to an underyling bug)
    private SAXSource toXmlSetup(Source source) throws IOException {
        try {
            // initialize transformer
            initializeTransformers();
            // load transformations
            if (toXmlTemplates == null) {
                toXmlTemplates = new Templates[toXmlTransforms.length];
                for (int i = 0; i < toXmlTemplates.length; i++) {
                    // File file = new File(toXmlTransforms[i]);
                    // toXmlTemplates[i] = stf.newTemplates(new StreamSource(file));
                    String url = toXmlTransforms[i].toString();
                    toXmlTemplates[i] = stf.newTemplates(new StreamSource(url));
                }
            }
            // set up initial reader
            SAXParser parser = spf.newSAXParser();
            XMLReader reader = parser.getXMLReader();
            // set up chain of filters
            XMLFilter[] filters = new XMLFilter[toXmlTransforms.length];
            for (int i = 0; i < filters.length; i++) {
                // create filter
                filters[i] = stf.newXMLFilter(toXmlTemplates[i]);
                // set parent
                if (i == 0) { filters[0].setParent(reader); }
                else { filters[i].setParent(filters[i-1]); }
            }
            // set final reader/filter
            XMLReader finalReader = (filters.length == 0) ? reader : filters[filters.length-1];
            // set up and return LF to XML SAX source with final reader/filter
            return new SAXSource(finalReader, SAXSource.sourceToInputSource(source));
        } catch (ParserConfigurationException pce) {
            throw (IOException) new IOException().initCause(pce);
        } catch (SAXException se) {
            throw (IOException) new IOException().initCause(se);
        } catch (TransformerConfigurationException tce) {
            throw (IOException) new IOException().initCause(tce);
        }
    }

    /**
     * Saves the given LF with the given target string to an XML file 
     * with the given filename, applying the configured to-XML
     * transformations.
     */
    public synchronized void saveToXml(LF lf, String target, String filename) throws IOException { 
        // ensure dirs exist for filename
        File file = new File(filename);
        File parent = file.getParentFile();
        if (parent != null && !parent.exists()) { parent.mkdirs(); }
        FileOutputStream out = new FileOutputStream(file); 
        saveToXml(lf, target, out);
        out.close();
    }

    /**
     * Saves the given LF with the given target string as XML to the 
     * given output stream, applying the configured to-XML
     * transformations.
     */
    public synchronized void saveToXml(LF lf, String target, OutputStream out) throws IOException { 
        // make doc with XML for LF and target
        Document doc = new Document();
        Element root = new Element("xml");
        doc.setRootElement(root);
        root.addContent(HyloHelper.toXml(lf));
        Element targetElt = new Element("target");
        targetElt.addContent(target);
        root.addContent(targetElt);

        // write transformed doc to file
        try {
            // do setup and get source
            Source source = toXmlSetup(new JDOMSource(doc));
            // do transformation
            transformer.transform(source, new StreamResult(new OutputStreamWriter(out)));
        } catch (TransformerException exc) { 
            throw (IOException) new IOException().initCause(exc);
        }
    }

    
    /**
     * Transforms an LF by applying the configured to-XML and from-XML transformations, 
     * then loading the LF from the resulting doc.
     */
    public synchronized LF transformLF(LF lf) throws IOException {
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        saveToXml(lf, "", out);
        ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
        Document doc = loadFromXml(in);
        return Realizer.getLfFromDoc(doc);
    }
    
    /**
     * Loads an LF by applying the configured from-XML transformations, 
     * then loading the LF from the resulting doc.
     */
    public synchronized LF loadLF(Document doc) throws IOException {
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        serializeXml(doc, out);
        ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
        Document doc2 = loadFromXml(in);
        return Realizer.getLfFromDoc(doc2);
    }
    
    
    /**
     * Convenience method to serialize XML.
     */
    public synchronized void serializeXml(Document doc, OutputStream out) throws IOException {
        try {
            initializeTransformers();
            JDOMResult result = new JDOMResult(); // as suggested by Amy Isard, for better java/xml version compatibility
            transformer.transform(new JDOMSource(doc), result);
            XMLOutputter outputter = new XMLOutputter();
            outputter.output(result.getDocument(), new OutputStreamWriter(out)); // end of A.I. suggestion
        } catch (TransformerException exc) { 
            throw (IOException) new IOException().initCause(exc);
        }
    }

    
    /** 
     * Makes an element for the given LF, applying the configured to-XML transformations.
     */
    public synchronized Element makeLfElt(LF lf) throws IOException { 
        // make doc with LF in it
        Document lfDoc = new Document();
        lfDoc.setRootElement(HyloHelper.toXml(lf));
        // apply to-XML transformations
        try {
            // do setup and get source
            Source source = toXmlSetup(new JDOMSource(lfDoc));
            // do transformation and get resulting doc
            JDOMResult result = new JDOMResult();
            transformer.transform(source, result);
            lfDoc = result.getDocument();
        } catch (TransformerException exc) { 
            throw (IOException) new IOException().initCause(exc);
        }
        return lfDoc.detachRootElement();
    }

    
    /** 
     * Returns whether the given string is a recognized pitch accent.
     */
    public static boolean isPitchAccent(String s) {
        if (pitchAccentsSet == null) {
            pitchAccentsSet = new HashSet<String>();
            for (int i = 0; i < pitchAccents.length; i++) {
                pitchAccentsSet.add(pitchAccents[i]);
            }
        }
        return pitchAccentsSet.contains(s);
    }
    
    /** 
     * Returns whether the given string is a recognized boundary tone. 
     */
    public static boolean isBoundaryTone(String s) {
        if (boundaryTonesSet == null) {
            boundaryTonesSet = new HashSet<String>();
            for (int i = 0; i < boundaryTones.length; i++) {
                boundaryTonesSet.add(boundaryTones[i]);
            }
        }
        return boundaryTonesSet.contains(s);
    }
    
    
    /**
     * Saves the given sign's words, pitch accents and boundary tones 
     * to an APML file with the given filename.
     */
    public synchronized void saveToApml(Sign sign, String filename) throws IOException {
        // ensure dirs exist for filename
        File file = new File(filename);
        File parent = file.getParentFile();
        if (parent != null && !parent.exists()) { parent.mkdirs(); }
        // do transformation
        FileWriter fw = new FileWriter(file);
        saveToApml(sign, fw);
        fw.close();
    }
    
    /**
     * Saves the given sign's words, pitch accents and boundary tones 
     * as APML to the given writer.
     * The orthography is first converted to XML using Sign.getWordsInXml, 
     * and then converted to APML using opennlp/ccg/grammar/to-apml.xsl.
     * The string is assumed to be a single performative.
     */
    public synchronized void saveToApml(Sign sign, Writer writer) throws IOException { 
        // convert words
        Document doc = sign.getWordsInXml(new HashSet(0));
        
        // write transformed doc to file
        try {
            // do setup and get source
            initializeTransformers();
            Source source = new JDOMSource(doc);
            // do transformation
            apmlTransformer.transform(source, new StreamResult(writer));
        } catch (TransformerException exc) { 
            throw (IOException) new IOException().initCause(exc);
        }
    }
    
    
    /** 
     * Returns the words for the given string, as determined by its 
     * first parse, or an empty list, if it cannot be parsed.
     */
    // NB: Could try to extend this to find the parse with the intended LF.
    public List<Word> getParsedWords(String s) {
        // ensure parser instantiated
        if (parser == null) parser = new Parser(this);
        // get parses
        try {
            parser.parse(s);
        }
        catch (ParseException pe) {
            return new ArrayList<Word>(0);
        }
        List<Sign> parses = parser.getResult();
        // return words of first parse
        Sign sign = parses.get(0);
        return sign.getWords();
    }


	/**
	* Returns the name of the loaded grammar. Null if no name given.
	*/
	public final String getName() {
		return grammarName;
	}
}

